import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd   
import pickle
import numpy as np
import matplotlib.pyplot as plt

# Load the dataset    
melbourne_housing = pd.read_csv(os.path.join(sys.argv[1], 'melb_data.csv')) 

flemington_housing = melbourne_housing[melbourne_housing['Suburb'] == 'Flemington']
flemington_land_sizes = flemington_housing['Landsize']
average_land_size = np.mean(flemington_land_sizes)
plt.boxplot(flemington_land_sizes)
plt.ylabel('Land Size')
plt.title('Distribution of Land Sizes in Flemington, Australia')

plt.savefig('./ref_result/box_chart.png')
# plt.show()

print(f"Average land size in Flemington, Australia: {average_land_size:.2f} square meters")
# pickle.dump(average_land_size,open("./ref_result/average_land_size.pkl","wb"))


import pandas as pd   
import pickle
import matplotlib.pyplot as plt

# Load the dataset

flemington_prices = flemington_housing['Price']
plt.hist(flemington_prices, bins='auto', edgecolor='black')
plt.xlabel('Property Prices')
plt.ylabel('Frequency')
plt.title('Distribution of Property Prices in Flemington, Australia')

plt.savefig('./ref_result/bar_chart.png')
# plt.show()


import pandas as pd   
import pickle
import numpy as np

# Load the dataset

flemington_land_sizes = flemington_housing['Landsize']
average_land_size = np.mean(flemington_land_sizes)
plt.boxplot(flemington_land_sizes)
plt.ylabel('Land Size')
plt.title('Distribution of Land Sizes in Flemington, Australia')

plt.savefig('./ref_result/box_chart.png')
# plt.show()

print(f"Average land size in Flemington, Australia: {average_land_size:.2f} square meters")
# pickle.dump(average_land_size,open("./ref_result/average_land_size.pkl","wb"))
 


import pandas as pd
import pickle

# Load the dataset

correlation_price_distance = flemington_housing[['Price', 'Distance']].corr()

print(correlation_price_distance)
# pickle.dump(correlation_price_distance,open("./ref_result/correlation_price_distance.pkl","wb"))




import pandas as pd   
import pickle

# Load the dataset

# Calculate the maximum date in the dataset
max_date = pd.to_datetime(melbourne_housing['Date'], format='%d/%m/%Y').max().year
# Calculate the mean, median, and quartiles of property ages in Flemington
flemington_housing['Age'] = max_date - flemington_housing['YearBuilt']
property_ages = flemington_housing['Age']
 
# Calculate mean, median, first quartile, and third quartile
mean_age = property_ages.mean()
median_age = property_ages.median()
first_quartile = property_ages.quantile(0.25)
third_quartile = property_ages.quantile(0.75)
 
# Print the calculated values
print(f"Mean property age: {mean_age:.2f} years")
pickle.dump(mean_age,open("./ref_result/mean_age.pkl","wb"))

print(f"Median property age: {median_age:.2f} years")
pickle.dump(median_age,open("./ref_result/median_age.pkl","wb"))

print(f"First quartile: {first_quartile:.2f} years")
pickle.dump(first_quartile,open("./ref_result/first_quartile.pkl","wb"))

print(f"Third quartile: {third_quartile:.2f} years")
pickle.dump(third_quartile,open("./ref_result/third_quartile.pkl","wb"))
 
# Identify historical buildings that could be renovated for sustainability
# Calculate the threshold year for renovations
threshold_year = max_date - 30
 
# Identify buildings that could be renovated for sustainability
buildings_to_renovate = flemington_housing[flemington_housing['YearBuilt'] <= threshold_year]
print("\nBuildings that could be renovated for sustainability:")
print(buildings_to_renovate[['Suburb', 'Address', 'YearBuilt']])
pickle.dump(buildings_to_renovate,open("./ref_result/buildings_to_renovate.pkl","wb"))
